####################################################################
# Replication code for
#   Lyall, Shiraito, and Imai, "Coethnic Bias and Wartime Informing"
#
# Code to run Gibbs sampler for simulating the posterior
# distribution of the ordered probit model
#
# Author: Yuki Shiraito
# Created: March 3, 2015
####################################################################

rm(list = ls())

library(MCMCpack)
library(foreach)
library(doMC)
library(doRNG)

load("data.RData")


### ordered probit regression of retaliation question
#### outcome variable for ordered probit regression
data$retal_all <- data$retal_ff
data$retal_all <- ifelse(!is.na(data$retal_all), data$retal_all, data$retal_pashtun)
data$retal_all <- ifelse(!is.na(data$retal_all), data$retal_all, data$retal_tajik)
data$retal_all <- ifelse(data$retal_all == 98 | data$retal_all == 99, NA,
                         data$retal_all)
data$retal_all <- as.ordered(data$retal_all)
#### endorser variable for ordered probit regression
data$endorser <- ifelse(!is.na(data$retal_pashtun), 1, 0)
data$endorser <- ifelse(!is.na(data$retal_tajik), 2, data$endorser)
data$endorser <- factor(data$endorser, levels = 0:2, labels = c("ISAF", "Pashtun",
                                                       "Tajik"))
#### ethnicity variable for ordered probit regression
data$tajik <- factor(data$ethnicity, levels = 0:1, labels = c("Pashtun", "Tajik"))
#### enumerator variable for ordered probit regression (combine Hazara and Uzbek)
data$enum <- ifelse(data$M15 == "Tajik", 1, 0)
data$enum <- ifelse(data$M15 == "Uzbek", 2, data$enum)
data$enum <- ifelse(data$M15 == "Hazara", 2, data$enum)
data$enum <- factor(data$enum, levels = 0:2, labels = c("Pashtun", "Tajik", "Others"))


#### Bayesian Hierarchical model
data$retal_all <- as.numeric(data$retal_all)

data.village$random.effects <- 0

formula.indiv <- formula(retal_all ~ factor(village_id) +
                         endorser +
                         ## ethnicity: indicator of ethno.tribe
                         ethno.tribe +
                         ethno.tribe : endorser +
                         ## socioeconomic variables 
                         mon_income + age + school_years +
                         mon_income : endorser + age : endorser + school_years : endorser +
                         ## contact variables and their interactions
                         contact_ff + contact_anp + contact_otherethno + 
                         contact_ff : ethno.tribe + contact_anp : ethno.tribe +
                         contact_ff : endorser + contact_anp : endorser + contact_otherethno : endorser + 
                         contact_ff : ethno.tribe : endorser + contact_anp : ethno.tribe : endorser +
                         ## harm/damage variables
                         damagebyff.ProTaliban + damagebyff.Other + damagebyff.Tajik +
                         damagebyanp.ProTaliban + damagebyanp.Other + damagebyanp.Tajik+
                         damagebytaliban.ProTaliban + damagebytaliban.Other + damagebytaliban.Tajik + 
                         ## harm/damage variables
                         damagebyff.ProTaliban : endorser + damagebyff.Other : endorser + damagebyff.Tajik : endorser +
                         damagebyanp.ProTaliban : endorser + damagebyanp.Other : endorser + damagebyanp.Tajik : endorser +
                         damagebytaliban.ProTaliban : endorser + damagebytaliban.Other : endorser + damagebytaliban.Tajik : endorser )
formula.village <- formula(random.effects
                           ~ ISAF.sigacts.2 + Insurg.sigacts.2 +
                           ## IMMAP
                           Unknown.immap.2 + ISAF.immap.2 + Insurg.immap.2 +
                           ## aid
                           nsp.0 +
                           ## military installations
                           base.2 +
                           ## geography
                           log.altitude + log.population + percent_pashtun +
                           taliban.control + contested)

model.matrix.indiv <- model.matrix(formula.indiv, data)
varname.indiv <- colnames(model.matrix.indiv)
model.matrix.village <- model.matrix(formula.village, data.village)
varname.village <- colnames(model.matrix.village)

B0 <- sigma2 <- beta0 <- gamma <- beta <- beta.store <- gamma.store <- sigma2.store <- list()

for (j in 1:3) {

  B0[[j]] <- matrix(0, ncol = ncol(model.matrix.indiv), nrow = ncol(model.matrix.indiv))
  diag(B0[[j]])[101:ncol(model.matrix.indiv)] <- .1
  sigma2[[j]] <- 1 / rgamma(1, shape = 20 / 2, scale = 1 / 2)
  diag(B0[[j]])[1:100] <- c(sigma2[[j]], rep(sigma2[[j]] * 2, times = 99))
  beta0[[j]] <- rep(0, length(varname.indiv))

  gamma[[j]] <- rnorm(length(varname.village), sd = c(.9, .02, .005, .18, .01, .006, .15, .15, .15, .1,
                                                   .003, .22, .22))
  beta[[j]] <- numeric(length(varname.indiv))
  beta0[[j]][1:100] <- c((model.matrix.village %*% gamma[[j]])[1],
                    (model.matrix.village %*% gamma[[j]])[2:100] - (model.matrix.village %*% gamma[[j]])[1])
  beta[[j]] <- rnorm(ncol(model.matrix.indiv), mean = beta0[[j]], sd = sqrt(1))
}


n.chains <- 3

registerDoMC(n.chains)
registerDoRNG()

MCMC <- 1000
burn <- 500
k <- 1
max.iterate <- 3  # max.iterate * MCMC is the maximum number of iterations

repeat {

  print(k)
  
  post.sample <- foreach(i.chain = 1:n.chains) %dorng% {

      beta.store <- NULL
      gamma.store <- NULL
      sigma2.store <- NULL
      tau.store <- NULL
      villageRE.store <- NULL

      for (i in 1:MCMC) {

          Bayes.oprobit.out <- MCMCoprobit(formula.indiv,
                                           beta.start = beta[[i.chain]],
                                           b0 = beta0[[i.chain]],
                                           B0 = B0[[i.chain]],
                                           A0 = .1,
                                           data = data, burnin = 2, mcmc = 3)
          beta[[i.chain]] <- Bayes.oprobit.out[3, varname.indiv]

          if (i burn) {
              beta.store <- rbind(beta.store, beta[[i.chain]][101:ncol(model.matrix.indiv)])
              tau.store <- rbind(tau.store, Bayes.oprobit.out[3, c("gamma2", "gamma3", "gamma4")])
          }

          data.village$random.effects <- c(Bayes.oprobit.out[3, 1],
                                           Bayes.oprobit.out[3, 1] + Bayes.oprobit.out[3, 2:100])
          if (i burn) {
              villageRE.store <- rbind(villageRE.store, data.village$random.effects)
          }
          
          Bayes.regress.out <- MCMCregress(formula.village,
                                           data = data.village,
                                           burnin = 0, mcmc = 1,
                                           b0 = 0,
                                           B0 = .1,
                                           c0 = 20,
                                           d0 = 1,
                                           beta.start = gamma[[i.chain]])
          gamma[[i.chain]] <- Bayes.regress.out[, varname.village]
          sigma2[[i.chain]] <- Bayes.regress.out[, "sigma2"]

          if (i burn) {
              gamma.store <- rbind(gamma.store, gamma[[i.chain]])
              sigma2.store <- rbind(sigma2.store, sigma2[[i.chain]])
          }

          beta0[[i.chain]][1:100] <- c((model.matrix.village %*% gamma[[i.chain]])[1],
                                 (model.matrix.village %*% gamma[[i.chain]])[2:100] - (model.matrix.village %*% gamma[[i.chain]])[1])
          diag(B0[[i.chain]])[1:100] <- c(1/ sigma2[[i.chain]], rep(1 / sigma2[[i.chain]] * 2, times = 99))

          if (i.chain == 1 & !(i %% (MCMC / 10))) cat("*")
      }

      beta.store <- mcmc(beta.store, start = burn + 1, end = MCMC)
      gamma.store <- mcmc(gamma.store, start = burn + 1, end = MCMC)
      sigma2.store <- mcmc(sigma2.store, start = burn + 1, end = MCMC)
      tau.store <- mcmc(tau.store, start = burn + 1, end = MCMC)

      villageRE.store <- mcmc(villageRE.store, start = burn + 1, end = MCMC)

      temp <- list(as.mcmc(cbind(beta.store, gamma.store, sigma2.store, tau.store)),
                   villageRE.store)
      return(temp)
  }

  results.saved <- list(mcmc.list(post.sample[[1]][[1]],
                                  post.sample[[2]][[1]],
                                  post.sample[[3]][[1]]),
                        list(post.sample[[1]][[2]],
                             post.sample[[2]][[2]],
                             post.sample[[3]][[2]]))

  converge.diag <- gelman.diag(results.saved[[1]])

  k <- k + 1
  
  if((max(converge.diag$psrf[, "Upper C.I."]) < 1.5 & converge.diag$mpsrf < 1.5) |
     k > max.iterate) break

}
save(results.saved, file = "output.oprobit.RData")
